
*************************************************
* Merge Ficus-Fare, DADS and SINE
*************************************************

use "${file}ficusfare_all.dta", clear
merge 1:1 YEAR SIREN Y using "${file}dads_ent_all.dta" // Aggregated DADS ent files from 2002-2017
drop _m
// employment = 0 if not available
foreach v of var DADS_S_BRUT DADS_TREFFEN DADS_EFF_0101 DADS_EFF_3112 {
replace `v'=0 if `v'==.
}
duplicates drop SIREN Y, force
save "${file}sine_panel.dta", replace

// create panel template to merge with yearly balance sheet and employment data
use "${file}sinex.dta", clear // Aggregated SINE surveys from 2002-2018
expand 30
bys SIREN YEAR: gen Y=YEAR+_n-1

// merge FICUS_FARE_DADS
merge 1:1 YEAR SIREN Y using  "${file}sine_panel.dta"
drop if _merge==2
drop _merge

// account for companies that change sirene during their lifecycle
rename (SIREN SIREN2) (SIREN3 SIREN)
merge m:1 YEAR SIREN Y using  "${file}sine_panel.dta", update
drop if _merge==2
drop _merge
rename (SIREN3 SIREN) (SIREN SIREN2) 

// adjust panel years (unbalanced panel representative of available data)
drop if Y>2018 
bys YEAR SIREN:egen MAX=max(Y)
replace MAX=2018 if YEAR==2018|MAX==2017 // adjust max year to 2018
order YEAR SIREN Y DCES DATCSY MAX  APE*
bys SIREN YEAR: drop if _n>1& MAX==. // I keep the first obs if the creation year not available in ficus-fare-dads
drop if Y>MAX // drop when Y>available years in ficus-fare-dads
label var MAX "Y max available ficus-dads"
duplicates drop SIREN Y, force
sort YEAR SIREN Y
save "${file}sine_panel.dta", replace

// define Size and tangible assets at year=0
use "${file}sine_panel.dta", clear
egen siren_num=group(SIREN)
xtset  siren_num Y
gen tangible0=tangible if Y==YEAR
gen lnasset0=lnasset if Y==YEAR
bys siren_num: replace tangible0=f1.tangible if Y==YEAR&tangible0==.
bys siren_num: replace  lnasset0=f1.lnasset if Y==YEAR&lnasset0==.
label var tangible0 "Tangible/total assets$_{t=0}$"
label var lnasset0 "Log(total assets)$_{t=0}$"
save "${file}sine_panel.dta", replace

*************************************************
* Adjust Sector Classification in Panel
*************************************************
use "${file}sine_panel.dta", clear

tab YEAR if missing(APE2_sine) // APE2 classification is not systematically available in 2002 and before
tab YEAR if missing(APE1_sine) // APE1 is not used after 2006
tab YEAR if missing(DEPCOMA) 

// keep correct sector if conflict between different sources
order YEAR SIREN Y APE*
rename (APE1 APE2 APEda) (APE1_liasse APE2_liasse APE_dads) // sectors from different sources
drop APE_dads APE1bis_sine APE2bis_sine

gen APE2 = APE2_sine if YEAR!=2002 // we choose the sector available in SINE
replace APE2 = APE2_liasse if YEAR==2002 // if not available, use sector in ficus-fare
// keep sector consistent across the panel years
sort SIREN Y 
by SIREN: replace APE2=APE2[_n-1] if APE2==""
by SIREN: replace APE2_liasse=APE2_liasse[_n-1] if APE2_liasse==""
sort SIREN Y
forval i=1/20 {
by SIREN: replace APE2=APE2[_n+1] if APE2==""
by SIREN: replace APE2_liasse=APE2_liasse[_n+1] if APE2_liasse==""
}
replace APE2 = APE2_sine if YEAR==2002&APE2==""

// drop if APE rev2 is missing - only obs in 2002 and before should be deleted
tab YEAR if missing(APE2)
drop if APE2==""

tab YEAR if missing(APE1_sine)
gen APE1=APE1_sine

label var APE2 "APE rev 2 main SIC-5"
label var APE1 "APE rev 1 (2002 only)"

save "${file}sine_panel.dta", replace

*****************************************************
* Define Female-dominated sectors (SINE)
*****************************************************
use  "${file}sine_panel.dta", clear
keep YEAR SIREN Y FEMALE APE2
sort SIREN YEAR Y
// keep only one obs per company (repeated cross section)
bys SIREN YEAR: keep if YEAR==Y 
tab YEAR if missing(APE2) 
// add all levels of sector classifications
gen niv5=APE2
merge m:1 niv5 using "${file}nafniveauxrev2.dta"
drop if _merge==2
drop _merge

rename (niv1 niv2 niv3 niv4 niv5) (NAF1 NAF2 NAF3 NAF4 NAF5)
keep SIREN Y NAF* APE* YEAR FEMALE
drop if NAF5==""

bys NAF5 YEAR: gen NAF5_sine_n=_N
bys NAF5 YEAR: egen NAF5_sine_f=sum(FEMALE)
bys NAF5 YEAR: replace NAF5_sine_f= NAF5_sine_f/_N

bys NAF4 YEAR: gen NAF4_sine_n=_N
bys NAF4 YEAR: egen NAF4_sine_f=sum(FEMALE)
bys NAF4 YEAR: replace NAF4_sine_f= NAF4_sine_f/_N

bys NAF3 YEAR: gen NAF3_sine_n=_N
bys NAF3 YEAR: egen NAF3_sine_f=sum(FEMALE)
bys NAF3 YEAR: replace NAF3_sine_f= NAF3_sine_f/_N

bys NAF2 YEAR: gen NAF2_sine_n=_N
bys NAF2 YEAR: egen NAF2_sine_f=sum(FEMALE)
bys NAF2 YEAR: replace NAF2_sine_f= NAF2_sine_f/_N

bys NAF1 YEAR: gen NAF1_sine_n=_N
bys NAF1 YEAR: egen NAF1_sine_f=sum(FEMALE)
bys NAF1 YEAR: replace NAF1_sine_f= NAF1_sine_f/_N

keep YEAR SIREN NAF5_sine_n NAF5_sine_f NAF4_sine_n NAF4_sine_f NAF2_sine_n NAF2_sine_f NAF3_sine_n NAF3_sine_f
drop if YEAR<2002
save "${file}female_sine.dta", replace

*************************************************
* Add sector-level characteristics
*************************************************
use "${file}sine_panel.dta", clear

* merge survival dataset defined with sine panel
merge m:1 SIREN using "${file}survie.dta" 
drop if _merge==2
drop _merge
replace SURV3=0 if missing(SURV3)
replace SURVIE3=0 if missing(SURVIE3)
replace SURV5=0 if missing(SURV5)
replace SURVIE5=0 if missing(SURVIE5)

* merge and define innovative sectors
merge m:1 APE2 using "${file}INOVA.dta"
drop if _m==2
gen APE2_INOV= _m==3
drop _m
label var APE2_INOV "Innovative sector"

* sectors' characteristics 
merge m:1 NAF5 Y using "${file}characteristics_naf.dta"
drop if _merge==2 
drop _merge

* Merge female dominated sectors
drop NAF5
gen NAF5=APE1 if Y<2008 // adjust sectors with the rev1, rev 2 classification
replace NAF5=APE2 if Y>=2008

* Merge female business owners
merge m:1 NAF5 Y using "${file}female_sector.dta"
drop if _merge==2
drop _merge

* Merge female business owners creation
merge m:1 NAF5 Y using "${file}female_creation.dta"
drop if _merge==2 
drop _merge

* female-dominated sectors SINE
merge m:1 YEAR SIREN using "${file}female_sine.dta"
drop if _merge==2 
drop _merge 



**********************************************
* Define Dummy female-dominated sector at different SIC-levels
**********************************************

* Merge sector levels
gen niv5=APE2 
merge m:1 niv5 using "${file}nafniveauxrev2.dta"
drop if _merge==2
drop _merge
replace niv5=APE1 if Y<2008
merge m:1 niv5 using "${file}nafniveauxrev1.dta", update 
drop if _merge==2
drop _merge
drop NAF5 NAF4 NAF3 NAF2 NAF1
rename (niv1 niv2 niv3 niv4 niv5) (NAF1 NAF2 NAF3 NAF4 NAF5)

set more off
forval i=2/5 {
local a="NAF`i'_sine_f"
local b="NAF`i'_siren_f"
local c="NAF`i'_crea_f"

local a1="NAF`i'_sine_d5"
local b1="NAF`i'_siren_d5"
local c1="NAF`i'_crea_d5"
*/
// female entrepreneurs
summ  `a', d
gen `a1'=(`a'>=0.5)
replace `a1'=. if `a'==.
tab  `a1'
// female business owners
summ  `b', d
gen `b1'=(`b'>=0.5)
replace `b1'=. if `b'==.
tab  `b1'
// creation small businesses
summ  `c', d
gen `c1'=(`c'>=0.5)
replace `c1'=. if `c'==.
tab  `c1'
}

save  "${file}sine_panel.dta", replace


***********************************
* LABELS
***********************************
use "${file}sine_panel.dta", clear

label var FEMALE "Female"
label var YEAR "Cohort"
label var NATIOA1 "French national"
label var AGED "Age>40"

label var GRANDEECO "Elite engineering school"
label var EXPERT "Industry expert"
label var SERIAL "Serial entrepreneur"
label var SUPEREXPERT "Super expert"
label var SUPERSERIAL "Super serial"
label var EDUCHIGHER "College education"
label var INC "Incorporated"
label var STARTUP "Start-up"
label var PROCHA "Entrepreneurial family"
label var STATUS1 "Previously self-employed"
label var STATUS2 "Previously CEO"
label var STATUS3 "Previously employee"
label var STATUS4 "Previously student"
label var STATUS5 "Previously Unemployed"
label var QUALIF1 "Previously white-collar" 
label var QUALIF2 "Previously skilled employee"
label var QUALIF3 "Previously unskilled employee"
label var QUALIF4 "Previously blue-collar"

label var COUPL "Married"
label var ENF "Children"
label var EDUC0 "No degree"
label var EDUC1 "High school"
label var EDUC2 "Undergraduate"
label var EDUC3 "Graduate"
label var GRANDEECO "Grande ecole"
label var DIRIG1A "No co-founder"
label var DIRIG2A "Co-founded with spouse"
label var DIRIG3A "Co-founded with family"
label var DIRIG4A "Co-founded with business partners"
label var DIRIGA "Co-founder(s)"
label var CCD "Co-founded with spouse"
label var CCA "Work with spouse"

label var OBJECTIF "Growth-oriented"
label var MOTIV0 "Other reasons"
label var MOTIV1 "Motivation Independence"
label var MOTIV2 "Motivation Taste"
label var MOTIV4 "Motivation Opportunity"
label var MOTIV3 "Motivation New idea"
label var MOTIV5 "Motivation Successful peers"
label var MOTIV6 "Motivation Unemployed"
label var MOTIV7 "Unemployed, foreced"
label var MOTIV67 "Unemployed" 
label var DIFF0 "Difficulties - Nothing"
label var DIFF1 "Difficulties - Feeling lonely"
label var DIFF2 "Difficulties - Hiring skilled workers"
label var DIFF3 "Difficulties - Pricing products"
label var DIFF4 "Difficulties - Finding location"
label var DIFF5 "Difficulties - Clients"
label var DIFF6 "Difficulties - Administrative tasks"
label var DIFF7 "Difficulties - Getting fundings" 
label var DIFF8 "Difficulties - Bank overdraft"
label var DIFF9 "Difficulties - Open bank account" 
label var REVENB1 "Spouse income" 
label var REVENB2 "Other entrepreneur income " 
label var REVENB3 "Employment income" 
label var REVENB4 "Other income (benefits)" 
label var REVENB5 "No other income" 
label var B2B "B2B business"
label var B2C "B2C business"
label var GEOCLIA1 "Local customers"
label var GEOCLIA2 "Domestic customers" 
label var GEOCLIA3 "International customers"
label var NBCLIA1 "1 or 2 customers"
label var NBCLIA2 "3 to 10 customers" 
label var NBCLIA3 "Many customers"
label var NBCLIA4 "Many customers, a few big ones"
label var INOVP "Innovative business - Product"
label var INOVF "Innovative business - Production"
label var INOVM "Innovative business - Marketing"
label var INOVOR "Innovative business - Organization"
label var INOV "Innovative sector (SIC-5)"
label var INOV_SUM "Innovative business"

label var FINV4 "External equity"
label var FINV4soc "Business equity"
label var FINV4vc "Venture capital"
label var FINV1 "Bank loan" // corporate
label var FINV2 "Personal loan" // personal
label var FINV12 "Bank loans" // all
label var FINV3 "Other loans"
label var FINV5 "Public grant"
label var FINV6	"Crowdfunding & Microcredit" 

label var MOYENS6A "Startup capital"
label var MOYENS_2K "Startup Capital <2k"
label var MOYENS_4K "Startup Capital $\[$2-8$\[$k$\euro$"
label var MOYENS_8K "Startup Capital $\[$8-16$\[$k$\euro$"
label var MOYENS_16K "Starup Capital $\[$16-40$\[$k$\euro$"
label var MOYENS_40K "Startup Capital $\[$40-80$\[$k$\euro$"
label var MOYENS_80K "Startup Capital $\[$80-160$\[$k$\euro$"
label var MOYENS_160K "Startup Capital $\geq$160k"

save  "${file}sine_panel.dta", replace


*************************************************
* Define interaction variables & labels
*************************************************
use "${file}sine_panel.dta", clear

// define interaction variables 
gen MOTIV1F=MOTIV1*FEMALE
gen MOTIV2F=MOTIV2*FEMALE
gen MOTIV3F=MOTIV3*FEMALE
gen MOTIV4F=MOTIV4*FEMALE
gen MOTIV5F=MOTIV5*FEMALE
gen MOTIV0F = FEMALE*MOTIV0 
gen MOTIV6F = FEMALE*MOTIV6 
gen MOTIV67F = MOTIV67 *FEMALE 
gen COUPLF =COUPL*FEMALE
gen ENFF =ENF*FEMALE
gen PROCHAF =PROCHA*FEMALE 
gen AGEDF =AGED *FEMALE 
gen GRANDEECOF=FEMALE*GRANDEECO
gen SERIALF=FEMALE*SERIAL
gen EXPF=FEMALE*EXPERT
label var AGEDF "Female $\times$ Age $\geq$ 40"
label var PROCHAF "Female $\times$ Family entrepreneurs"
label var COUPLF "Female $\times$ Married"
label var ENFF "Female $\times$ Children"
label var NATIOA1 "French"
label var GRANDEECO "Elite school"
label var GRANDEECOF "Female $\times$ Elite school"
label var SUPEREXPERT "Industry expert $\geq$10 years"
label var SUPERSERIAL "Serial entrepreneur $\geq$3 startups"
label var EXPF  "Female $\times$ Industry expert"
label var SERIALF "Female $\times$ Serial entrepreneur"
label var MOTIV4F "Female $\times$ Opportunity"
label var MOTIV3F "Female $\times$ New idea"
label var MOTIV2F "Female $\times$ Taste"
label var MOTIV1F "Female $\times$ Independence"
label var MOTIV5F "Female $\times$ Successful peer"
label var MOTIV6F "Female $\times$ Unemployed"
label var MOTIV0F "Female $\times$ Other motivations"
label var MOTIV1 "Independence"
gen OBJF=OBJECTIF*FEMALE
label var OBJF "Female $\times$ Growth-oriented"
gen INCF=FEMALE * INC
label var INCF "Female $\times$ Incorporated"
gen DIRIGAF =FEMALE*DIRIGA 
label var DIRIGAF "Female $\times$ Co-founder(s)"
gen FINOV=FEMALE*INOV_SUM
label var FINOV "Female $\times$ Innovative"
gen STATUS5F=STATUS5*FEMALE
gen STATUS4F=STATUS4*FEMALE
gen STATUS3F=STATUS3*FEMALE
gen STATUS12= 1 if STATUS1==1|STATUS2==1
replace STATUS12= 0 if STATUS3==0&STATUS2==0
gen STATUS12F=STATUS12*FEMALE
label var STATUS3F "Female $\times$ Previously employee"
label var STATUS4F "Female $\times$ Previously student"
label var STATUS5F "Female $\times$ Previously unemployed"
label var STATUS12 "Previously CEO"
label var STATUS12F "Female $\times$ CEO"
label var B2B "B2B business model"
gen B2BF=B2B*FEMALE
label var B2BF "Female $\times$ B2B"
gen GEOCLIA = 1-GEOCLIA1 if !missing(GEOCLIA1) // non local clientele
label var GEOCLIA "Non-local clientele"
gen GEOCLIAF=GEOCLIA*FEMALE
label var GEOCLIAF "Female $\times$ Non-local customers"

gen FINVEXT2= (FINV5|FINV1|FINV2|FINV3)
gen FFINV1=FINV1*FEMALE
gen FFINV2=FINV2*FEMALE
gen FFINV3=FINV3*FEMALE
gen FFINV5=FINV5*FEMALE
gen FFINV6=FINV6*FEMALE
gen FFINV4 =FINV4*FEMALE
gen FDIFF7 = FEMALE*DIFF7
gen FFINVEXT2 =FINVEXT2*FEMALE
label var FFINV1 "Female $\times$ Bank loans"
label var FFINV2 "Female $\times$ Personal loans"
label var FFINV3 "Female $\times$ Other loans"
label var FFINV5 "Female $\times$ Public grants"
label var FFINV6 "Female $\times$ Microcredit"
label var FINV6 "Microcredit"
label var FFINV4 "Female $\times$ External equity"
label var DIFF7 "Difficulties getting fundings"
label var FDIFF7 "Female $\times$ Difficulty getting funding"
label var FINVEXT2 "External financing"
label var FFINVEXT2 "Female $\times$ External financing"
gen VC=FINV4vc if YEAR>=2010 // vc defined only after 2010
gen VCF =VC*FEMALE
label var VC "VC"
label var VCF "Female $\times$ VC"

gen FREVENB1=REVENB1*FEMALE
gen REVENB23=REVENB2+REVENB3
replace REVENB23=1 if REVENB23>1&!missing(REVENB23)
gen FREVENB23=REVENB23*FEMALE
gen FREVENB5=REVENB5*FEMALE
label var REVENB1 "Other income spouse"
label var REVENB4 "Other income benefits"
label var FREVENB1 "Female $\times$ Spouse income"
label var REVENB23 "Other income employment"
label var FREVENB23 "Female $\times$ Other employment income"
label var FREVENB5 "Female $\times$ No other income"
label var DIRIG1A "No co-founder"
label var DIRIG2A "Co-founded with spouse"
label var DIRIG3A "Co-founded with family"
label var DIRIG4A "Co-founded with business partners"
label var DIRIGA "Co-founder(s)"
label var CCD "Co-founded with spouse"
label var CCA "Work with spouse"

label var RESULTA "Net income"
label var REX "EBIT"
label var sales "Sales"
label var lnsales "Ln(sales)"
label var roa "ROA"
label var SURV1 "Survival $\geq$1 year"
label var SURV3 "Survival $\geq$3 years"
label var SURV5 "Survival $\geq$5 years"

// define initial capital buckets
tab YEAR MOYENS6A, row
capture drop CAPITAL
gen CAPITAL = MOYENS6A>=6
replace CAPITAL = . if missing(MOYENS6A)
tab YEAR CAPITAL, row
label var CAPITAL "Startup capital $\geq$40k\euro"
gen CAPITALF=CAPITAL*FEMALE
label var CAPITALF "Female $\times$ Startup capital $\geq$40k\euro"

// sector level variables
gen nf4crea = NAF4_crea_d5 * FEMALE 
label var nf4crea "Female $\times$ F-dominated sector"
gen nf4sine = NAF4_sine_d5 * FEMALE 
label var nf4sine "Female $\times$ F-dominated sector (SINE)"
gen nf4siren = NAF4_siren_d5 * FEMALE 
label var nf4siren "Female $\times$ F-dominated sector (SIREN)"
gen nf5crea = NAF5_crea_d5 * FEMALE 
label var nf5crea "Female $\times$ F-dominated sector"
gen nf5sine = NAF5_sine_d5 * FEMALE 
label var nf5sine "Female $\times$ F-dominated sector (SINE)"
gen nf5siren = NAF5_siren_d5 * FEMALE 
label var nf5siren "Female $\times$ F-dominated sector (SIREN)"
gen APE2_INOVF =FEMALE*APE2_INOV
label var APE2_INOVF "Female $\times$ Innovative sector"
capture drop NAF5_margin_q NAF5_margin_high NAF5_margin_low
gquantiles NAF5_margin_q = NAF5_margin if !missing(NAF5_margin), xtile  nq(5) by(YEAR)
gen NAF5_margin_high = NAF5_margin_q==5
gen NAF5_margin_low = NAF5_margin_q==1
label var  NAF5_margin "Profit margin sector"
label var  NAF5_margin_high "High profit margin sector (Top quintile)"
gen  NAF5_margin_high_f =  NAF5_margin_high*FEMALE 
label var  NAF5_margin_high_f "Female $\times$ High profit margin sector"

// Fixed effects
egen apey=group(NAF5 YEAR)
egen ape2=group(NAF2)
egen ape5=group(NAF5)
egen ape3=group(NAF3)
egen ape4=group(NAF4)
egen apey4=group(NAF4 YEAR)
egen apey2=group(NAF2 YEAR)
egen apey3=group(NAF3 YEAR)
egen apey1=group(NAF1 YEAR)
gen d = substr(DEPCOMA,2,1)
egen dep=group(d)
egen apeydep=group(NAF5 YEAR d)
drop d

// drop non-startups (e.g.,franchise, subsidiaries)
drop if STARTUP==0 
drop if FILIALA=="3" 

// keep after2002
drop if YEAR<2002

* panel-dataset
tab YEAR if YEAR==Y
*save  "${file}sine_panel2.dta", replace

* save repeated cross-sectional dataset
keep if YEAR==Y
save  "${file}sine_panel.dta", replace


